Imputation
We generated a plot to see all the missing values in the sample.
#<div style="border: 1px solid #ddd; padding: 5px; overflow-y: scroll; height:400px; overflow-x: scroll; width:100%">
library(dplyr)
library(ggplot2)
vector_variables<-
c("row", "hash_key", "edad_al_ing_grupos", "estado_conyugal_2", "numero_de_hijos_mod", "num_hijos_trat_res_mod", "embarazo", "sus_principal_mod", "otras_sus1_mod", "freq_cons_sus_prin", "compromiso_biopsicosocial", "tipo_centro", "servicio_sal1", "tipo_de_plan_res", "edad_ini_sus_prin_grupos", "tenencia_de_la_vivienda_mod", "tipo_de_programa_2", "motivodeegreso_mod_imp", "dias_treat_imp_sin_na", "motivodeegreso_mod_imp", "dup","duplicates_filtered")
missing.values<-
CONS_C1_df_dup_SEP_2020_women %>%
rowwise %>%
dplyr::mutate_at(.vars = vars(vector_variables),
.funs = ~ifelse(is.na(.), 1, 0)) %>%
dplyr::ungroup() %>%
dplyr::summarise_at(vars(vector_variables),~sum(.))
#t(missing.values)
plot_miss<-
missing.values %>%
data.table::melt() %>% #condicion_ocupacional_corr
dplyr::filter(!variable %in% c("row", "hash_key", "dias_treat_imp_sin_na", "dup")) %>%
dplyr::mutate(perc= value/sum(nrow(CONS_C1_df_dup_SEP_2020_women))) %>%
dplyr::mutate(label_text= paste0("Variable= ",variable,"<br>n= ",value,"<br>",scales::percent(round(perc,3)))) %>%
dplyr::mutate(perc=perc*100) %>%
ggplot() +
geom_bar(aes(x=factor(variable), y=perc,label= label_text), stat = 'identity') +
sjPlot::theme_sjplot()+
# scale_y_continuous(limits=c(0,1), labels=percent)+
theme(axis.text.x = element_text(angle = 90, hjust = 1, size=9))+
labs(x=NULL, y="% of Missing Values", caption=paste0("Nota. Percentage of missing values (n= ",sum(nrow(CONS_C1_df_dup_SEP_2020_women)),")"))
ggplotly(plot_miss, tooltip = c("label_text"))%>% layout(xaxis= list(showticklabels = T), height = 600, width=800) %>% layout(yaxis = list(tickformat='%', range = c(0, 30)))
#</div>
From the figure above, we could see that the Other frequent substances (otras_sus1_mod), the Number of children into a residential treatment (num_hijos_trat_res_mod), the Age at first use of principal substance, grouped (edad_ini_sus_prin_grupos), and the evaluation of the therapeutic process (evaluacindelprocesoteraputico) had more than 10% of missing data. These values should be imputed.
#origen_ingreso #dg_global_nec_int_soc_or_1 "Diagnóstico global de necesidades de integración social" #evaluacindelprocesoteraputico "Evaluación del proceso terapéutico" #escolaridad_rec "macrozona"
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#HACER BASE ESPECIAL QUE CONTENGA UNA VARIABLE DE EDAD DE INICIO DE CONSUMO DE SUSTANCIA PRINCIPAL PARA EQUIPARAR
CONS_C1_df_dup_SEP_2020_women_miss<-
CONS_C1_df_dup_SEP_2020_women %>%
#dplyr::group_by(hash_key) %>%
#dplyr::mutate(rn=row_number()) %>%
#dplyr::ungroup() %>%
#:#:#:#:#:#:#:#:#:#:#:
# ORDINALIZAR LAS VARIABLES ORDINALES:
#dplyr::mutate(escolaridad_rec=parse_factor(as.character(escolaridad_rec),levels=c('3-Completed primary school or less', '2-Completed high school or less', '1-More than high school'), ordered =T,trim_ws=T,include_na =F, locale=locale(encoding = "Latin1"))) %>%
dplyr::mutate(edad_al_ing_grupos=parse_factor(as.character(edad_al_ing_grupos),levels=c('<18', '18-29', '30-39', '40-49', '50+'), ordered =T,trim_ws=F,include_na =F)) %>% #, locale=locale(encoding = "Latin1")
dplyr::mutate(edad_ini_sus_prin_grupos=parse_factor(as.character(edad_ini_sus_prin_grupos),levels=c('<=15', '16-18', '19-24', '>=25'), ordered =T,trim_ws=F,include_na =F)) %>% #, locale=locale(encoding = "Latin1")
dplyr::mutate(freq_cons_sus_prin=parse_factor(as.character(freq_cons_sus_prin),levels=c('Did not use', 'Less than 1 day a week','2 to 3 days a week','4 to 6 days a week','1 day a week or more','Daily'), ordered =T,trim_ws=F,include_na =F)) %>% #, locale=locale(encoding = "Latin1")
dplyr::mutate(compromiso_biopsicosocial=parse_factor(as.character(compromiso_biopsicosocial),levels=c('1-Mild', '2-Moderate','3-Severe'), ordered =T,trim_ws=F,include_na =F)) %>% #, locale=locale(encoding = "Latin1")
dplyr::select_(.dots = vector_variables) %>%
data.table::data.table()
#CONS_C1_df_dup_SEP_2020 %>% janitor::tabyl(evaluacindelprocesoteraputico)
#CONS_C1_df_dup_SEP_2020 %>% janitor::tabyl(nombre_region)
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
library(Amelia)
amelia_fit <- amelia(CONS_C1_df_dup_SEP_2020_women_miss[,-c("duplicates_filtered")],
#Warning message:
#In amcheck(x = x, m = m, idvars = numopts$idvars, priors = priors, :
#The number of categories in one of the variables marked nominal has greater than 10 categories. Check nominal specification.
m=61,
parallel = "multicore",
idvars="row",
noms= c("estado_conyugal_2", "embarazo", "sus_principal_mod", "otras_sus1_mod", "tipo_centro", "tipo_de_plan_res", "tenencia_de_la_vivienda_mod","tipo_de_programa_2","motivodeegreso_mod_imp", "servicio_sal1"),
ords= c("edad_al_ing_grupos", "freq_cons_sus_prin","compromiso_biopsicosocial", "edad_ini_sus_prin_grupos"),
cs = "hash_key",
ts = "dup",
incheck = TRUE)
# Se sacó el servicio de salud porque tiene mucha información: The number of categories in one of the variables marked nominal has greater than 10 categories. Check nominal specification.
#Error in yy %*% unique(na.omit(x.orig[, i])) : non-conformable arguments.
Age at Admission to Treatment (in groups)
We started looking over the missing values in the age at admission (n4). Since there were not cases with more than one treatment that had not an age of admission, we did not have to impute taking into account serial dependencies in the dates of treatment.
#On this graph, a y = x line indicates the line of perfect agreement; that is, if the imputation model was a perfect predictor of the true value, all the imputations would fall on this line
no_mostrar=0
if(no_mostrar==1){
res <- {
setTimeLimit(nn_K*500)
ovr_imp_edad_ini_cons<-overimpute(amelia_fit, var = "edad_al_ing_grupos")
}
}
paste0("Users that had more than one treatment with no date of admission: ",CONS_C1_df_dup_SEP_2020_women_miss %>%
dplyr::group_by(hash_key) %>%
dplyr::mutate(na_edad_ing=sum(is.na(edad_al_ing_grupos))) %>%
dplyr::ungroup() %>%
dplyr::filter(na_edad_ing>0) %>%
dplyr::group_by(hash_key) %>%
dplyr::summarise(n=n()) %>% dplyr::filter(n>1) %>% nrow())
## [1] "Users that had more than one treatment with no date of admission: 0"
#Hay poca relación en las imputaciones.
#table(is.na(CONS_C1_df_dup_SEP_2020_women_not_miss$edad_al_ing),exclude=NULL)
edad_al_ing_grupos_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$edad_al_ing_grupos,
amelia_fit$imputations$imp2$edad_al_ing_grupos,
amelia_fit$imputations$imp3$edad_al_ing_grupos,
amelia_fit$imputations$imp4$edad_al_ing_grupos,
amelia_fit$imputations$imp5$edad_al_ing_grupos,
amelia_fit$imputations$imp6$edad_al_ing_grupos,
amelia_fit$imputations$imp7$edad_al_ing_grupos,
amelia_fit$imputations$imp8$edad_al_ing_grupos,
amelia_fit$imputations$imp9$edad_al_ing_grupos,
amelia_fit$imputations$imp10$edad_al_ing_grupos,
amelia_fit$imputations$imp11$edad_al_ing_grupos,
amelia_fit$imputations$imp12$edad_al_ing_grupos,
amelia_fit$imputations$imp13$edad_al_ing_grupos,
amelia_fit$imputations$imp14$edad_al_ing_grupos,
amelia_fit$imputations$imp15$edad_al_ing_grupos,
amelia_fit$imputations$imp16$edad_al_ing_grupos,
amelia_fit$imputations$imp17$edad_al_ing_grupos,
amelia_fit$imputations$imp18$edad_al_ing_grupos,
amelia_fit$imputations$imp19$edad_al_ing_grupos,
amelia_fit$imputations$imp20$edad_al_ing_grupos,
amelia_fit$imputations$imp21$edad_al_ing_grupos,
amelia_fit$imputations$imp22$edad_al_ing_grupos,
amelia_fit$imputations$imp23$edad_al_ing_grupos,
amelia_fit$imputations$imp24$edad_al_ing_grupos,
amelia_fit$imputations$imp25$edad_al_ing_grupos,
amelia_fit$imputations$imp26$edad_al_ing_grupos,
amelia_fit$imputations$imp27$edad_al_ing_grupos,
amelia_fit$imputations$imp28$edad_al_ing_grupos,
amelia_fit$imputations$imp29$edad_al_ing_grupos,
amelia_fit$imputations$imp30$edad_al_ing_grupos,
amelia_fit$imputations$imp31$edad_al_ing_grupos,
amelia_fit$imputations$imp32$edad_al_ing_grupos,
amelia_fit$imputations$imp33$edad_al_ing_grupos,
amelia_fit$imputations$imp34$edad_al_ing_grupos,
amelia_fit$imputations$imp35$edad_al_ing_grupos,
amelia_fit$imputations$imp36$edad_al_ing_grupos,
amelia_fit$imputations$imp37$edad_al_ing_grupos,
amelia_fit$imputations$imp38$edad_al_ing_grupos,
amelia_fit$imputations$imp39$edad_al_ing_grupos,
amelia_fit$imputations$imp40$edad_al_ing_grupos,
amelia_fit$imputations$imp41$edad_al_ing_grupos,
amelia_fit$imputations$imp42$edad_al_ing_grupos,
amelia_fit$imputations$imp43$edad_al_ing_grupos,
amelia_fit$imputations$imp44$edad_al_ing_grupos,
amelia_fit$imputations$imp45$edad_al_ing_grupos,
amelia_fit$imputations$imp46$edad_al_ing_grupos,
amelia_fit$imputations$imp47$edad_al_ing_grupos,
amelia_fit$imputations$imp48$edad_al_ing_grupos,
amelia_fit$imputations$imp49$edad_al_ing_grupos,
amelia_fit$imputations$imp50$edad_al_ing_grupos,
amelia_fit$imputations$imp51$edad_al_ing_grupos,
amelia_fit$imputations$imp52$edad_al_ing_grupos,
amelia_fit$imputations$imp53$edad_al_ing_grupos,
amelia_fit$imputations$imp54$edad_al_ing_grupos,
amelia_fit$imputations$imp55$edad_al_ing_grupos,
amelia_fit$imputations$imp56$edad_al_ing_grupos,
amelia_fit$imputations$imp57$edad_al_ing_grupos,
amelia_fit$imputations$imp58$edad_al_ing_grupos,
amelia_fit$imputations$imp59$edad_al_ing_grupos,
amelia_fit$imputations$imp60$edad_al_ing_grupos,
amelia_fit$imputations$imp61$edad_al_ing_grupos
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
#18-29 30-39 40-49 50+
janitor::clean_names() %>%
dplyr::arrange(amelia_fit_imputations_imp1_row) %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::summarise(edad_18_29=sum(value == "18-29",na.rm=T),
edad_30_39=sum(value == "30-39",na.rm=T),
edad_40_49=sum(value == "40-49",na.rm=T),
edad_50mas=sum(value =="50+",na.rm=T)) %>%
dplyr::ungroup() %>%
#dplyr::mutate(edad_suma = base::rowSums(dplyr::select(is.na(.),starts_with("edad"))))
dplyr::mutate(ties= base::rowSums(dplyr::select(.,starts_with("edad"))>0)) %>%
dplyr::mutate(edad_al_ing_grupos_imp= dplyr::case_when(
(edad_18_29> edad_30_39) & (edad_18_29> edad_40_49) & (edad_18_29> edad_50mas)~"18-29",
(edad_30_39> edad_18_29) & (edad_30_39> edad_40_49) & (edad_30_39> edad_50mas)~"30-39",
(edad_40_49> edad_18_29) & (edad_40_49> edad_30_39) & (edad_40_49> edad_50mas)~"40-49",
(edad_50mas> edad_18_29) & (edad_50mas> edad_30_39) & (edad_50mas> edad_40_49)~"50+"
))
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
##
# Reemplazo los valores perdidos:
CONS_C1_df_dup_SEP_2020_women_miss0<-
CONS_C1_df_dup_SEP_2020_women_miss %>%
dplyr::left_join(edad_al_ing_grupos_imputed,by=c("row"="amelia_fit_imputations_imp1_row")) %>%
#si la edad al ingreso no existe, el valor promedio imutado es
dplyr::mutate(edad_al_ing_grupos=dplyr::case_when(is.na(edad_al_ing_grupos)~edad_al_ing_grupos_imp,
T~as.character(edad_al_ing_grupos))) %>%
dplyr::select(-edad_18_29, -edad_30_39, -edad_40_49, -edad_50mas, -ties, -edad_al_ing_grupos_imp)
After the imputation, there were no missing cases left.
Primary or main substance
Then we imputed the primary/main substance at admission (n= 1).
# Ver distintos valores propuestos para sustancia de inciio
sus_principal_mod_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$sus_principal_mod,
amelia_fit$imputations$imp2$sus_principal_mod,
amelia_fit$imputations$imp3$sus_principal_mod,
amelia_fit$imputations$imp4$sus_principal_mod,
amelia_fit$imputations$imp5$sus_principal_mod,
amelia_fit$imputations$imp6$sus_principal_mod,
amelia_fit$imputations$imp7$sus_principal_mod,
amelia_fit$imputations$imp8$sus_principal_mod,
amelia_fit$imputations$imp9$sus_principal_mod,
amelia_fit$imputations$imp10$sus_principal_mod,
amelia_fit$imputations$imp11$sus_principal_mod,
amelia_fit$imputations$imp12$sus_principal_mod,
amelia_fit$imputations$imp13$sus_principal_mod,
amelia_fit$imputations$imp14$sus_principal_mod,
amelia_fit$imputations$imp15$sus_principal_mod,
amelia_fit$imputations$imp16$sus_principal_mod,
amelia_fit$imputations$imp17$sus_principal_mod,
amelia_fit$imputations$imp18$sus_principal_mod,
amelia_fit$imputations$imp19$sus_principal_mod,
amelia_fit$imputations$imp20$sus_principal_mod,
amelia_fit$imputations$imp21$sus_principal_mod,
amelia_fit$imputations$imp22$sus_principal_mod,
amelia_fit$imputations$imp23$sus_principal_mod,
amelia_fit$imputations$imp24$sus_principal_mod,
amelia_fit$imputations$imp25$sus_principal_mod,
amelia_fit$imputations$imp26$sus_principal_mod,
amelia_fit$imputations$imp27$sus_principal_mod,
amelia_fit$imputations$imp28$sus_principal_mod,
amelia_fit$imputations$imp29$sus_principal_mod,
amelia_fit$imputations$imp30$sus_principal_mod,
amelia_fit$imputations$imp31$sus_principal_mod,
amelia_fit$imputations$imp32$sus_principal_mod,
amelia_fit$imputations$imp33$sus_principal_mod,
amelia_fit$imputations$imp34$sus_principal_mod,
amelia_fit$imputations$imp35$sus_principal_mod,
amelia_fit$imputations$imp36$sus_principal_mod,
amelia_fit$imputations$imp37$sus_principal_mod,
amelia_fit$imputations$imp38$sus_principal_mod,
amelia_fit$imputations$imp39$sus_principal_mod,
amelia_fit$imputations$imp40$sus_principal_mod,
amelia_fit$imputations$imp41$sus_principal_mod,
amelia_fit$imputations$imp42$sus_principal_mod,
amelia_fit$imputations$imp43$sus_principal_mod,
amelia_fit$imputations$imp44$sus_principal_mod,
amelia_fit$imputations$imp45$sus_principal_mod,
amelia_fit$imputations$imp46$sus_principal_mod,
amelia_fit$imputations$imp47$sus_principal_mod,
amelia_fit$imputations$imp48$sus_principal_mod,
amelia_fit$imputations$imp49$sus_principal_mod,
amelia_fit$imputations$imp50$sus_principal_mod,
amelia_fit$imputations$imp51$sus_principal_mod,
amelia_fit$imputations$imp52$sus_principal_mod,
amelia_fit$imputations$imp53$sus_principal_mod,
amelia_fit$imputations$imp54$sus_principal_mod,
amelia_fit$imputations$imp55$sus_principal_mod,
amelia_fit$imputations$imp56$sus_principal_mod,
amelia_fit$imputations$imp57$sus_principal_mod,
amelia_fit$imputations$imp58$sus_principal_mod,
amelia_fit$imputations$imp59$sus_principal_mod,
amelia_fit$imputations$imp60$sus_principal_mod,
amelia_fit$imputations$imp61$sus_principal_mod
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
#18-29 30-39 40-49 50+
janitor::clean_names() %>%
dplyr::arrange(amelia_fit_imputations_imp1_row) %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::summarise(sus_prin_mar=sum(value == "Marijuana",na.rm=T),
sus_prin_oh=sum(value == "Alcohol",na.rm=T),
sus_prin_pb=sum(value == "Cocaine paste",na.rm=T),
sus_prin_coc=sum(value =="Cocaine hydrochloride",na.rm=T),
sus_prin_other=sum(value =="Other",na.rm=T)) %>%
dplyr::ungroup() %>%
dplyr::mutate(ties= base::rowSums(dplyr::select(.,starts_with("sus_prin_"))>0)) %>%
dplyr::mutate(sus_principal_mod_imp= dplyr::case_when(
(sus_prin_mar> sus_prin_oh)& (sus_prin_mar> sus_prin_pb)& (sus_prin_mar> sus_prin_coc)& (sus_prin_mar> sus_prin_other)~"Marijuana",
(sus_prin_oh> sus_prin_mar)& (sus_prin_oh> sus_prin_pb)& (sus_prin_oh> sus_prin_coc)& (sus_prin_oh> sus_prin_other)~"Alcohol",
(sus_prin_pb> sus_prin_mar)& (sus_prin_pb> sus_prin_oh)& (sus_prin_pb> sus_prin_coc)& (sus_prin_pb> sus_prin_other)~"Cocaine paste",
(sus_prin_coc> sus_prin_mar)& (sus_prin_coc> sus_prin_oh)& (sus_prin_coc> sus_prin_pb)& (sus_prin_coc> sus_prin_other)~"Cocaine hydrochloride",
(sus_prin_other> sus_prin_mar)& (sus_prin_other> sus_prin_oh)& (sus_prin_other> sus_prin_pb)& (sus_prin_other> sus_prin_coc)~"Cocaine hydrochloride"
))
## `summarise()` ungrouping output (override with `.groups` argument)
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_women_miss1<-
CONS_C1_df_dup_SEP_2020_women_miss0 %>%
dplyr::left_join(sus_principal_mod_imputed, by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(sus_principal_mod=factor(dplyr::case_when(is.na(sus_principal_mod)~as.character(sus_principal_mod_imp),
TRUE~as.character(sus_principal_mod)))) %>%
dplyr::select(-c(sus_prin_mar, sus_prin_oh, sus_prin_pb, sus_prin_coc, sus_prin_other, ties, sus_principal_mod_imp)) %>%
data.table()
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
As a result of the imputations, there were no missing values once imputed.
Age at first use of primary substance (in groups)
Another variable worth imputing is the Age at first use of principal substance (n= 3,189).
Based on the figure above, the Age at first use of principal substance was similar between the imputed values and the observed. However, we followed the rules stated in Duplicates4 process (link). There was a logical condition to fulfill in order to replace adequately these values in the database: the age of onset of drug use in the primary substance at admission may not be greater than the age of admission to treatment. Then, we selected the minimum value of age of onset of drug use among the imputed, because one user could not have more than one age of onset of drug use.
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
edad_ini_sus_prin_grupos_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp2$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp3$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp4$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp5$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp6$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp7$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp8$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp9$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp10$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp11$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp12$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp13$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp14$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp15$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp16$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp17$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp18$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp19$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp20$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp21$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp22$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp23$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp24$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp25$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp26$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp27$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp28$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp29$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp30$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp31$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp32$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp33$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp34$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp35$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp36$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp37$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp38$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp39$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp40$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp41$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp42$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp43$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp44$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp45$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp46$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp47$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp48$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp49$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp50$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp51$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp52$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp53$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp54$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp55$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp56$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp57$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp58$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp59$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp60$edad_ini_sus_prin_grupos,
amelia_fit$imputations$imp61$edad_ini_sus_prin_grupos
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
#<=15 16-18 19-24 >=25
janitor::clean_names() %>%
dplyr::arrange(amelia_fit_imputations_imp1_row) %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::summarise(eipm_menor_igual_15=sum(value == "<=15",na.rm=T),
eipm_16_18=sum(value == "16-18",na.rm=T),
eipm_19_24=sum(value == "19-24",na.rm=T),
eipm_mas_igual_25=sum(value ==">=25",na.rm=T)) %>%
dplyr::ungroup() %>%
dplyr::left_join(CONS_C1_df_dup_SEP_2020_women_miss1[,c("row","hash_key","edad_al_ing_grupos", "sus_principal_mod")],
by=c("amelia_fit_imputations_imp1_row"="row")) %>%
#1) Los candidatos a imputar no pueden ser válidos si son en categorías de edad de consumo más altas que la edad de ingreso
dplyr::mutate(eipm_mas_igual_25= dplyr::case_when(edad_al_ing_grupos=="<18"~0,T~as.numeric(eipm_mas_igual_25))) %>%
dplyr::mutate(eipm_19_24= dplyr::case_when(edad_al_ing_grupos=="<18"~0,T~as.numeric(eipm_19_24))) %>%
#1.2) ¿Qué pasa si por grupo de usuario y sustancia principal hay valores mínimos incompatibles e términos de la edad de ingreso?
dplyr::mutate(edad_ing_num=dplyr:::case_when(edad_al_ing_grupos=="<18"~1,
edad_al_ing_grupos=="18-29"~2,
edad_al_ing_grupos=="30-39"~3,
edad_al_ing_grupos=="40-49"~4,
edad_al_ing_grupos=="50+"~5)) %>%
dplyr::mutate(hash_sus_prin_mod=paste0(hash_key,"_",sus_principal_mod)) %>%
dplyr::select(-c(sus_principal_mod)) %>%
dplyr::group_by(hash_sus_prin_mod) %>%
dplyr::mutate(edad_ing_num_min=min(edad_ing_num)) %>%
dplyr::ungroup() %>%
dplyr::mutate(eipm_mas_igual_25= dplyr::case_when(edad_ing_num_min==1~0,T~as.numeric(eipm_mas_igual_25))) %>%
dplyr::mutate(eipm_19_24= dplyr::case_when(edad_ing_num_min==1~0,T~as.numeric(eipm_19_24))) %>%
# 2) generamos un resumen de los valores siendo consistentes según misma sustancia principal y mismo usuario
dplyr::group_by(hash_sus_prin_mod) %>%
dplyr::mutate(sum_eipm_menor_igual_15=sum(eipm_menor_igual_15),
sum_eipm_16_18=sum(eipm_16_18),
sum_eipm_19_24=sum(eipm_19_24),
sum_eipm_mas_igual_25=sum(eipm_mas_igual_25)) %>%
dplyr::ungroup() %>%
# Hacer la variable imputada
dplyr::mutate(ties= base::rowSums(dplyr::select(.,starts_with("edad_ini_sus_prin_"))>0)) %>%
dplyr::mutate(edad_ini_sus_prin_grupos_imp= dplyr::case_when(
(sum_eipm_menor_igual_15>sum_eipm_16_18)&(sum_eipm_menor_igual_15>sum_eipm_19_24)&(sum_eipm_menor_igual_15>sum_eipm_mas_igual_25)~"<=15",
(sum_eipm_16_18>sum_eipm_menor_igual_15)&(sum_eipm_16_18>sum_eipm_19_24)&(sum_eipm_16_18>sum_eipm_mas_igual_25)~"16-18",
(sum_eipm_19_24>sum_eipm_menor_igual_15)&(sum_eipm_19_24>sum_eipm_16_18)&(sum_eipm_19_24>sum_eipm_mas_igual_25)~"19-24",
(sum_eipm_mas_igual_25>sum_eipm_menor_igual_15)&(sum_eipm_mas_igual_25>sum_eipm_16_18)&(sum_eipm_mas_igual_25>sum_eipm_19_24)~">=25")) %>%
# 3) Another step if there are ties, choose the most vulnerable value
dplyr::mutate(ties2=ifelse(is.na(edad_ini_sus_prin_grupos_imp),1,0))
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
CONS_C1_df_dup_SEP_2020_women_miss2<-
CONS_C1_df_dup_SEP_2020_women_miss1 %>%
dplyr::left_join(edad_ini_sus_prin_grupos_imputed[,c("amelia_fit_imputations_imp1_row","edad_ini_sus_prin_grupos_imp")],
by=c("row"="amelia_fit_imputations_imp1_row")) %>%
#si la edad al ingreso no existe, el valor promedio imutado es
dplyr::mutate(edad_ini_sus_prin_grupos=
dplyr::case_when(is.na(edad_ini_sus_prin_grupos)~edad_ini_sus_prin_grupos_imp,
T~as.character(edad_ini_sus_prin_grupos))) %>%
dplyr::select(-edad_ini_sus_prin_grupos_imp)
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#is.na(edad_ini_cons) & is.na(edad_ini_sus_prin) & is.na(min_edad_al_ing)~as.numeric(avg),
#table(is.na(CONS_C1_df_dup_SEP_2020_women_miss1$edad_ini_cons))
paste0("Number of rows with values that did not fulfilled the conditions: ",CONS_C1_df_dup_SEP_2020_women_miss2 %>% dplyr::filter(is.na(edad_ini_sus_prin_grupos)) %>%
dplyr::select(hash_key, edad_al_ing_grupos,edad_ini_sus_prin_grupos) %>% nrow())
## [1] "Number of rows with values that did not fulfilled the conditions: 139"
#Lo importante es tener en cuenta que las imputaciones se hicieron por filas; no, en cambio, ahora debemos reemplazar aquellos casos que tienen perdidos (no cumplieron con las condiciones) con el valor mínimo
As a result of the imputations, there were 139 cases of ages of onset of drug use of the primary substance that did not fulfilled the conditions necessary to replace the missing values with the imputed ones, possibly due to ties in the candidate values.
Frequency of Use of the Primary Substance at Admission
Another variable that is worth imputing is the Frequency of use of primary drug at admission (n= 169). In case of ties, we selected the imputed values with the value with the most frequent drug use.
# Ver distintos valores propuestos para sustancia de inciio
freq_cons_sus_prin_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$freq_cons_sus_prin,
amelia_fit$imputations$imp2$freq_cons_sus_prin,
amelia_fit$imputations$imp3$freq_cons_sus_prin,
amelia_fit$imputations$imp4$freq_cons_sus_prin,
amelia_fit$imputations$imp5$freq_cons_sus_prin,
amelia_fit$imputations$imp6$freq_cons_sus_prin,
amelia_fit$imputations$imp7$freq_cons_sus_prin,
amelia_fit$imputations$imp8$freq_cons_sus_prin,
amelia_fit$imputations$imp9$freq_cons_sus_prin,
amelia_fit$imputations$imp10$freq_cons_sus_prin,
amelia_fit$imputations$imp11$freq_cons_sus_prin,
amelia_fit$imputations$imp12$freq_cons_sus_prin,
amelia_fit$imputations$imp13$freq_cons_sus_prin,
amelia_fit$imputations$imp14$freq_cons_sus_prin,
amelia_fit$imputations$imp15$freq_cons_sus_prin,
amelia_fit$imputations$imp16$freq_cons_sus_prin,
amelia_fit$imputations$imp17$freq_cons_sus_prin,
amelia_fit$imputations$imp18$freq_cons_sus_prin,
amelia_fit$imputations$imp19$freq_cons_sus_prin,
amelia_fit$imputations$imp20$freq_cons_sus_prin,
amelia_fit$imputations$imp21$freq_cons_sus_prin,
amelia_fit$imputations$imp22$freq_cons_sus_prin,
amelia_fit$imputations$imp23$freq_cons_sus_prin,
amelia_fit$imputations$imp24$freq_cons_sus_prin,
amelia_fit$imputations$imp25$freq_cons_sus_prin,
amelia_fit$imputations$imp26$freq_cons_sus_prin,
amelia_fit$imputations$imp27$freq_cons_sus_prin,
amelia_fit$imputations$imp28$freq_cons_sus_prin,
amelia_fit$imputations$imp29$freq_cons_sus_prin,
amelia_fit$imputations$imp30$freq_cons_sus_prin,
amelia_fit$imputations$imp31$freq_cons_sus_prin,
amelia_fit$imputations$imp32$freq_cons_sus_prin,
amelia_fit$imputations$imp33$freq_cons_sus_prin,
amelia_fit$imputations$imp34$freq_cons_sus_prin,
amelia_fit$imputations$imp35$freq_cons_sus_prin,
amelia_fit$imputations$imp36$freq_cons_sus_prin,
amelia_fit$imputations$imp37$freq_cons_sus_prin,
amelia_fit$imputations$imp38$freq_cons_sus_prin,
amelia_fit$imputations$imp39$freq_cons_sus_prin,
amelia_fit$imputations$imp40$freq_cons_sus_prin,
amelia_fit$imputations$imp41$freq_cons_sus_prin,
amelia_fit$imputations$imp42$freq_cons_sus_prin,
amelia_fit$imputations$imp43$freq_cons_sus_prin,
amelia_fit$imputations$imp44$freq_cons_sus_prin,
amelia_fit$imputations$imp45$freq_cons_sus_prin,
amelia_fit$imputations$imp46$freq_cons_sus_prin,
amelia_fit$imputations$imp47$freq_cons_sus_prin,
amelia_fit$imputations$imp48$freq_cons_sus_prin,
amelia_fit$imputations$imp49$freq_cons_sus_prin,
amelia_fit$imputations$imp50$freq_cons_sus_prin,
amelia_fit$imputations$imp51$freq_cons_sus_prin,
amelia_fit$imputations$imp52$freq_cons_sus_prin,
amelia_fit$imputations$imp53$freq_cons_sus_prin,
amelia_fit$imputations$imp54$freq_cons_sus_prin,
amelia_fit$imputations$imp55$freq_cons_sus_prin,
amelia_fit$imputations$imp56$freq_cons_sus_prin,
amelia_fit$imputations$imp57$freq_cons_sus_prin,
amelia_fit$imputations$imp58$freq_cons_sus_prin,
amelia_fit$imputations$imp59$freq_cons_sus_prin,
amelia_fit$imputations$imp60$freq_cons_sus_prin,
amelia_fit$imputations$imp61$freq_cons_sus_prin
)
freq_cons_sus_prin_imputed<-
freq_cons_sus_prin_imputed %>%
data.frame() %>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("1 day a week or more",as.character(.))~1,TRUE~0), .names="1_day_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("2 to 3 days a week",as.character(.))~1,TRUE~0), .names="2_3_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("4 to 6 days a week",as.character(.))~1,TRUE~0), .names="4_6_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("Less than 1 day a week",as.character(.))~1,TRUE~0), .names="less_1_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("Did not use",as.character(.))~1,TRUE~0), .names="did_not_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("Daily",as.character(.))~1,TRUE~0), .names="daily_{col}"))%>%
dplyr::mutate(freq_cons_sus_prin_daily = base::rowSums(dplyr::select(., starts_with("daily_")))) %>%
dplyr::mutate(freq_cons_sus_prin_4_6 = base::rowSums(dplyr::select(., starts_with("4_6_"))))%>%
dplyr::mutate(freq_cons_sus_prin_2_3 = base::rowSums(dplyr::select(., starts_with("2_3_"))))%>%
dplyr::mutate(freq_cons_sus_prin_1_day = base::rowSums(dplyr::select(., starts_with("1_day_"))))%>%
dplyr::mutate(freq_cons_sus_prin_less_1 = base::rowSums(dplyr::select(., starts_with("less_1_"))))%>%
dplyr::mutate(freq_cons_sus_prin_did_not = base::rowSums(dplyr::select(., starts_with("did_not_")))) %>%
#dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_1_day>0~1,TRUE~0)) %>%
dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_2_3>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>%
dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_4_6>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>%
dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_less_1>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>%
dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_did_not>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>%
dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_daily>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>%
#hierarchy
dplyr::mutate(freq_cons_sus_prin_to_imputation=
dplyr::case_when(freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_daily>0~"Daily",
freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_4_6>0~"4 to 6 days a week",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_2_3>0~"2 to 3 days a week",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_1_day>0~"1 day a week or more",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_less_1>0~"Less than 1 day a week",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_did_not>0~"Did not use",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_daily>0~"Daily",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_4_6>0~"4 to 6 days a week",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_2_3>0~"2 to 3 days a week",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_1_day>0~"1 day a week or more",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_less_1>0~"Less than 1 day a week",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_did_not>0~"Did not use")) %>%
janitor::clean_names()
freq_cons_sus_prin_imputed<-
dplyr::select(freq_cons_sus_prin_imputed,amelia_fit_imputations_imp1_row,freq_cons_sus_prin_to_imputation)
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_women_miss3<-
CONS_C1_df_dup_SEP_2020_women_miss2 %>%
dplyr::left_join(freq_cons_sus_prin_imputed, by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(freq_cons_sus_prin=factor(dplyr::case_when(is.na(freq_cons_sus_prin)~as.character(freq_cons_sus_prin_to_imputation), TRUE~as.character(freq_cons_sus_prin)))) %>%
dplyr::select(-freq_cons_sus_prin_to_imputation) %>%
data.table()
As a result of the imputations, there were no missing values once imputed.
Health service
Another variable that is worth imputing is the Health service (n= 7). Considering there were a lot of candidates, given the high number of services (since, the possible categories), we selected the health service relative to the commune of residence to impute the health service. In case of an observation that lived in the commune of Santiago, we left the nearest health service among the candidates (Metropolitano Norte)
#
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#::#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#::#:#:#:
servicio_sal1_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$servicio_sal1,
amelia_fit$imputations$imp2$servicio_sal1,
amelia_fit$imputations$imp3$servicio_sal1,
amelia_fit$imputations$imp4$servicio_sal1,
amelia_fit$imputations$imp5$servicio_sal1,
amelia_fit$imputations$imp6$servicio_sal1,
amelia_fit$imputations$imp7$servicio_sal1,
amelia_fit$imputations$imp8$servicio_sal1,
amelia_fit$imputations$imp9$servicio_sal1,
amelia_fit$imputations$imp10$servicio_sal1,
amelia_fit$imputations$imp11$servicio_sal1,
amelia_fit$imputations$imp12$servicio_sal1,
amelia_fit$imputations$imp13$servicio_sal1,
amelia_fit$imputations$imp14$servicio_sal1,
amelia_fit$imputations$imp15$servicio_sal1,
amelia_fit$imputations$imp16$servicio_sal1,
amelia_fit$imputations$imp17$servicio_sal1,
amelia_fit$imputations$imp18$servicio_sal1,
amelia_fit$imputations$imp19$servicio_sal1,
amelia_fit$imputations$imp20$servicio_sal1,
amelia_fit$imputations$imp21$servicio_sal1,
amelia_fit$imputations$imp22$servicio_sal1,
amelia_fit$imputations$imp23$servicio_sal1,
amelia_fit$imputations$imp24$servicio_sal1,
amelia_fit$imputations$imp25$servicio_sal1,
amelia_fit$imputations$imp26$servicio_sal1,
amelia_fit$imputations$imp27$servicio_sal1,
amelia_fit$imputations$imp28$servicio_sal1,
amelia_fit$imputations$imp29$servicio_sal1,
amelia_fit$imputations$imp30$servicio_sal1,
amelia_fit$imputations$imp31$servicio_sal1,
amelia_fit$imputations$imp32$servicio_sal1,
amelia_fit$imputations$imp33$servicio_sal1,
amelia_fit$imputations$imp34$servicio_sal1,
amelia_fit$imputations$imp35$servicio_sal1,
amelia_fit$imputations$imp36$servicio_sal1,
amelia_fit$imputations$imp37$servicio_sal1,
amelia_fit$imputations$imp38$servicio_sal1,
amelia_fit$imputations$imp39$servicio_sal1,
amelia_fit$imputations$imp40$servicio_sal1,
amelia_fit$imputations$imp41$servicio_sal1,
amelia_fit$imputations$imp42$servicio_sal1,
amelia_fit$imputations$imp43$servicio_sal1,
amelia_fit$imputations$imp44$servicio_sal1,
amelia_fit$imputations$imp45$servicio_sal1,
amelia_fit$imputations$imp46$servicio_sal1,
amelia_fit$imputations$imp47$servicio_sal1,
amelia_fit$imputations$imp48$servicio_sal1,
amelia_fit$imputations$imp49$servicio_sal1,
amelia_fit$imputations$imp50$servicio_sal1,
amelia_fit$imputations$imp51$servicio_sal1,
amelia_fit$imputations$imp52$servicio_sal1,
amelia_fit$imputations$imp53$servicio_sal1,
amelia_fit$imputations$imp54$servicio_sal1,
amelia_fit$imputations$imp55$servicio_sal1,
amelia_fit$imputations$imp56$servicio_sal1,
amelia_fit$imputations$imp57$servicio_sal1,
amelia_fit$imputations$imp58$servicio_sal1,
amelia_fit$imputations$imp59$servicio_sal1,
amelia_fit$imputations$imp60$servicio_sal1,
amelia_fit$imputations$imp61$servicio_sal1
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
#<=15 16-18 19-24 >=25
janitor::clean_names() %>%
dplyr::arrange(amelia_fit_imputations_imp1_row) %>%
dplyr::group_by(amelia_fit_imputations_imp1_row, value) %>%
tally() %>%
pivot_wider(id_cols="amelia_fit_imputations_imp1_row",names_from="value", values_from="n", values_fill=0) %>%
dplyr::ungroup() %>%
dplyr::left_join(CONS_C1_df_dup_SEP_2020[,c("row", "comuna_residencia_cod", "servicio_de_salud")], by=c("amelia_fit_imputations_imp1_row"="row")) %>%
dplyr::filter(is.na(servicio_de_salud)) %>%
dplyr::select(-servicio_de_salud) %>%
melt(id.vars=c("amelia_fit_imputations_imp1_row","comuna_residencia_cod")) %>%
dplyr::arrange(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(value=dplyr::case_when(grepl("ARICA",comuna_residencia_cod) & variable=="Arica"~value*100,
grepl("SANT",comuna_residencia_cod) & grepl("Metrop",variable) & grepl("Norte",variable)~value*100,
grepl("NOGAL",comuna_residencia_cod) & grepl("Quillota",variable)~value*100,
grepl("PUYE",comuna_residencia_cod) & grepl("Osorno",variable)~value*100,
T~as.numeric(value))) %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
slice_max(value)
#para ver si la imputación s ehizo bien para cada fila (quedando con una observacion solamente)
ifelse(servicio_sal1_imputed %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(n=n()) %>%
dplyr::filter(n>1) %>% nrow()>0,"problems with imputation", "")
## [1] ""
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#::#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#::#:#:#:
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_women_miss4<-
CONS_C1_df_dup_SEP_2020_women_miss3 %>%
dplyr::left_join(servicio_sal1_imputed[c("amelia_fit_imputations_imp1_row","value")], by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(servicio_sal1=factor(dplyr::case_when(is.na(servicio_sal1)~as.character(value), T~as.character(servicio_sal1)))) %>%
dplyr::select(-value) %>%
data.table()
We ended having 0 missing values in health service.
Marital status
Additionally, we replaced missing values of the marital status (n=44). Since different marital status were not clearly more vulnerable between each other, we selected the most frequent imputed value among the different imputed databases. Only in case of ties in the candidate values, we resolved them by discarding “married” status, which could be somehow less vulnerable than other categories.
# Ver distintos valores propuestos para estado conyugal
estado_conyugal_2_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$estado_conyugal_2,
amelia_fit$imputations$imp2$estado_conyugal_2,
amelia_fit$imputations$imp3$estado_conyugal_2,
amelia_fit$imputations$imp4$estado_conyugal_2,
amelia_fit$imputations$imp5$estado_conyugal_2,
amelia_fit$imputations$imp6$estado_conyugal_2,
amelia_fit$imputations$imp7$estado_conyugal_2,
amelia_fit$imputations$imp8$estado_conyugal_2,
amelia_fit$imputations$imp9$estado_conyugal_2,
amelia_fit$imputations$imp10$estado_conyugal_2,
amelia_fit$imputations$imp11$estado_conyugal_2,
amelia_fit$imputations$imp12$estado_conyugal_2,
amelia_fit$imputations$imp13$estado_conyugal_2,
amelia_fit$imputations$imp14$estado_conyugal_2,
amelia_fit$imputations$imp15$estado_conyugal_2,
amelia_fit$imputations$imp16$estado_conyugal_2,
amelia_fit$imputations$imp17$estado_conyugal_2,
amelia_fit$imputations$imp18$estado_conyugal_2,
amelia_fit$imputations$imp19$estado_conyugal_2,
amelia_fit$imputations$imp20$estado_conyugal_2,
amelia_fit$imputations$imp21$estado_conyugal_2,
amelia_fit$imputations$imp22$estado_conyugal_2,
amelia_fit$imputations$imp23$estado_conyugal_2,
amelia_fit$imputations$imp24$estado_conyugal_2,
amelia_fit$imputations$imp25$estado_conyugal_2,
amelia_fit$imputations$imp26$estado_conyugal_2,
amelia_fit$imputations$imp27$estado_conyugal_2,
amelia_fit$imputations$imp28$estado_conyugal_2,
amelia_fit$imputations$imp29$estado_conyugal_2,
amelia_fit$imputations$imp30$estado_conyugal_2,
amelia_fit$imputations$imp31$estado_conyugal_2,
amelia_fit$imputations$imp32$estado_conyugal_2,
amelia_fit$imputations$imp33$estado_conyugal_2,
amelia_fit$imputations$imp34$estado_conyugal_2,
amelia_fit$imputations$imp35$estado_conyugal_2,
amelia_fit$imputations$imp36$estado_conyugal_2,
amelia_fit$imputations$imp37$estado_conyugal_2,
amelia_fit$imputations$imp38$estado_conyugal_2,
amelia_fit$imputations$imp39$estado_conyugal_2,
amelia_fit$imputations$imp40$estado_conyugal_2,
amelia_fit$imputations$imp41$estado_conyugal_2,
amelia_fit$imputations$imp42$estado_conyugal_2,
amelia_fit$imputations$imp43$estado_conyugal_2,
amelia_fit$imputations$imp44$estado_conyugal_2,
amelia_fit$imputations$imp45$estado_conyugal_2,
amelia_fit$imputations$imp46$estado_conyugal_2,
amelia_fit$imputations$imp47$estado_conyugal_2,
amelia_fit$imputations$imp48$estado_conyugal_2,
amelia_fit$imputations$imp49$estado_conyugal_2,
amelia_fit$imputations$imp50$estado_conyugal_2,
amelia_fit$imputations$imp51$estado_conyugal_2,
amelia_fit$imputations$imp52$estado_conyugal_2,
amelia_fit$imputations$imp53$estado_conyugal_2,
amelia_fit$imputations$imp54$estado_conyugal_2,
amelia_fit$imputations$imp55$estado_conyugal_2,
amelia_fit$imputations$imp56$estado_conyugal_2,
amelia_fit$imputations$imp57$estado_conyugal_2,
amelia_fit$imputations$imp58$estado_conyugal_2,
amelia_fit$imputations$imp59$estado_conyugal_2,
amelia_fit$imputations$imp60$estado_conyugal_2,
amelia_fit$imputations$imp61$estado_conyugal_2
)
estado_conyugal_2_imputed<-
estado_conyugal_2_imputed %>%
data.frame() %>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Married/Shared living arrangements",as.character(.))~1,TRUE~0), .names="married_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Separated/Divorced",as.character(.))~1,TRUE~0), .names="sep_div_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Single",as.character(.))~1,TRUE~0), .names="singl_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Widower",as.character(.))~1,TRUE~0), .names="widow_{col}"))%>%
dplyr::mutate(estado_conyugal_2_married = base::rowSums(dplyr::select(., starts_with("married_"))))%>%
dplyr::mutate(estado_conyugal_2_sep_div = base::rowSums(dplyr::select(., starts_with("sep_div_"))))%>%
dplyr::mutate(estado_conyugal_2_singl = base::rowSums(dplyr::select(., starts_with("singl_"))))%>%
dplyr::mutate(estado_conyugal_2_wid = base::rowSums(dplyr::select(., starts_with("widow_"))))%>%
#dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_married>0~1,TRUE~0)) %>%
dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_sep_div>0~estado_conyugal_2_tot+1,TRUE~estado_conyugal_2_tot)) %>%
dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_singl>0~estado_conyugal_2_tot+1,TRUE~estado_conyugal_2_tot)) %>%
dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_wid>0~estado_conyugal_2_tot+1,TRUE~estado_conyugal_2_tot)) %>%
janitor::clean_names()
estado_conyugal_2_imputed_cat_est_cony<-
estado_conyugal_2_imputed %>%
tidyr::pivot_longer(c(estado_conyugal_2_married, estado_conyugal_2_sep_div, estado_conyugal_2_singl, estado_conyugal_2_wid), names_to = "cat_est_conyugal", values_to = "count") %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(estado_conyugal_2_imputed_max=max(count,na.rm=T)) %>%
dplyr::ungroup() %>%
dplyr::filter(estado_conyugal_2_imputed_max==count) %>%
dplyr::select(amelia_fit_imputations_imp1_row,cat_est_conyugal,count) %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(n_row=n()) %>%
dplyr::ungroup() %>%
dplyr::mutate(cat_est_conyugal=dplyr::case_when(n_row>1~NA_character_,
TRUE~cat_est_conyugal)) %>%
dplyr::distinct(amelia_fit_imputations_imp1_row,.keep_all = T)
estado_conyugal_2_imputed<-
estado_conyugal_2_imputed %>%
dplyr::left_join(estado_conyugal_2_imputed_cat_est_cony, by="amelia_fit_imputations_imp1_row") %>%
dplyr::mutate(cat_est_conyugal=dplyr::case_when(cat_est_conyugal=="estado_conyugal_2_married"~"Married/Shared living arrangements",cat_est_conyugal=="estado_conyugal_2_sep_div"~"Separated/Divorced",cat_est_conyugal=="estado_conyugal_2_singl"~"Single",cat_est_conyugal=="estado_conyugal_2_wid"~"Widower"
))%>%
janitor::clean_names()
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_women_miss5_prev<-
CONS_C1_df_dup_SEP_2020_women_miss4 %>%
dplyr::left_join(dplyr::select(estado_conyugal_2_imputed,amelia_fit_imputations_imp1_row,cat_est_conyugal), by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(estado_conyugal_2=factor(dplyr::case_when(is.na(estado_conyugal_2)~as.character(cat_est_conyugal),TRUE~as.character(estado_conyugal_2)))) %>%
dplyr::select(-cat_est_conyugal) %>%
data.table()
# casos problemáticos de matrimonio c(59664, 17582, 161721, 36520)
no_calzaron_estado_cony<-
CONS_C1_df_dup_SEP_2020_women_miss5_prev %>% dplyr::filter(is.na(estado_conyugal_2)) %>% dplyr::distinct(row) %>% unlist()
estado_conyugal_2_imputed2<-
estado_conyugal_2_imputed %>%
dplyr::filter(amelia_fit_imputations_imp1_row %in% no_calzaron_estado_cony) %>%
dplyr::select(amelia_fit_imputations_imp1_row, estado_conyugal_2_married, estado_conyugal_2_sep_div,estado_conyugal_2_singl, estado_conyugal_2_wid, estado_conyugal_2_tot, cat_est_conyugal) %>%
melt(id.vars="amelia_fit_imputations_imp1_row") %>%
dplyr::mutate(value=as.numeric(value)) %>%
dplyr::arrange(amelia_fit_imputations_imp1_row) %>%
dplyr::filter(value!="cat_est_conyugal") %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
slice_max(value, with_ties = T) %>%
dplyr::filter(variable!="estado_conyugal_2_married")
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#2nd round of imputation for ties
CONS_C1_df_dup_SEP_2020_women_miss5<-
CONS_C1_df_dup_SEP_2020_women_miss5_prev %>%
dplyr::left_join(dplyr::select(estado_conyugal_2_imputed2,amelia_fit_imputations_imp1_row,value), by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(estado_conyugal_2=factor(dplyr::case_when(is.na(estado_conyugal_2)~as.character(value),TRUE~as.character(estado_conyugal_2)))) %>%
dplyr::select(-value) %>%
data.table()
#CONS_C1_df_dup_SEP_2020_women_miss5 %>%
#dplyr::filter(hash_key %in% CONS_C1_df_dup_SEP_2020_women_miss5 %>% dplyr::filter(is.na(estado_conyugal_2)) %>% dplyr::distinct(hash_key) %>% unlist())
We could not resolve Marital status in 0 cases due to ties in the most frequent values.
Type of Center
We looked over possible imputations to the type of the center (public or private) (n=7).
tipo_centro_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$tipo_centro,
amelia_fit$imputations$imp2$tipo_centro,
amelia_fit$imputations$imp3$tipo_centro,
amelia_fit$imputations$imp4$tipo_centro,
amelia_fit$imputations$imp5$tipo_centro,
amelia_fit$imputations$imp6$tipo_centro,
amelia_fit$imputations$imp7$tipo_centro,
amelia_fit$imputations$imp8$tipo_centro,
amelia_fit$imputations$imp9$tipo_centro,
amelia_fit$imputations$imp10$tipo_centro,
amelia_fit$imputations$imp11$tipo_centro,
amelia_fit$imputations$imp12$tipo_centro,
amelia_fit$imputations$imp13$tipo_centro,
amelia_fit$imputations$imp14$tipo_centro,
amelia_fit$imputations$imp15$tipo_centro,
amelia_fit$imputations$imp16$tipo_centro,
amelia_fit$imputations$imp17$tipo_centro,
amelia_fit$imputations$imp18$tipo_centro,
amelia_fit$imputations$imp19$tipo_centro,
amelia_fit$imputations$imp20$tipo_centro,
amelia_fit$imputations$imp21$tipo_centro,
amelia_fit$imputations$imp22$tipo_centro,
amelia_fit$imputations$imp23$tipo_centro,
amelia_fit$imputations$imp24$tipo_centro,
amelia_fit$imputations$imp25$tipo_centro,
amelia_fit$imputations$imp26$tipo_centro,
amelia_fit$imputations$imp27$tipo_centro,
amelia_fit$imputations$imp28$tipo_centro,
amelia_fit$imputations$imp29$tipo_centro,
amelia_fit$imputations$imp30$tipo_centro,
amelia_fit$imputations$imp31$tipo_centro,
amelia_fit$imputations$imp32$tipo_centro,
amelia_fit$imputations$imp33$tipo_centro,
amelia_fit$imputations$imp34$tipo_centro,
amelia_fit$imputations$imp35$tipo_centro,
amelia_fit$imputations$imp36$tipo_centro,
amelia_fit$imputations$imp37$tipo_centro,
amelia_fit$imputations$imp38$tipo_centro,
amelia_fit$imputations$imp39$tipo_centro,
amelia_fit$imputations$imp40$tipo_centro,
amelia_fit$imputations$imp41$tipo_centro,
amelia_fit$imputations$imp42$tipo_centro,
amelia_fit$imputations$imp43$tipo_centro,
amelia_fit$imputations$imp44$tipo_centro,
amelia_fit$imputations$imp45$tipo_centro,
amelia_fit$imputations$imp46$tipo_centro,
amelia_fit$imputations$imp47$tipo_centro,
amelia_fit$imputations$imp48$tipo_centro,
amelia_fit$imputations$imp49$tipo_centro,
amelia_fit$imputations$imp50$tipo_centro,
amelia_fit$imputations$imp51$tipo_centro,
amelia_fit$imputations$imp52$tipo_centro,
amelia_fit$imputations$imp53$tipo_centro,
amelia_fit$imputations$imp54$tipo_centro,
amelia_fit$imputations$imp55$tipo_centro,
amelia_fit$imputations$imp56$tipo_centro,
amelia_fit$imputations$imp57$tipo_centro,
amelia_fit$imputations$imp58$tipo_centro,
amelia_fit$imputations$imp59$tipo_centro,
amelia_fit$imputations$imp60$tipo_centro,
amelia_fit$imputations$imp61$tipo_centro
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::summarise(public= sum(value=="Public"),
private=sum(value=="Private")) %>%
dplyr::mutate(tipo_centro_imp=dplyr::case_when(public>=31~"Public",
private>=31~"Private"))
## `summarise()` ungrouping output (override with `.groups` argument)
#dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
# dplyr::mutate(n=n()) %>%
# dplyr::ungroup() %>%
# dplyr::filter(n>1)
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_women_miss6<-
CONS_C1_df_dup_SEP_2020_women_miss5 %>%
dplyr::left_join(dplyr::select(tipo_centro_imputed,amelia_fit_imputations_imp1_row,tipo_centro_imp), by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(tipo_centro=factor(dplyr::case_when(is.na(tipo_centro)~tipo_centro_imp,T~as.character(tipo_centro)))) %>%
dplyr::select(-c(tipo_centro_imp)) %>%
data.table()
As a result of the imputations, there were no missing values once imputed.
Pregnant at admission
We looked over possible imputations to the condition of pregnancy at admission (n=157).
embarazo_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$embarazo,
amelia_fit$imputations$imp2$embarazo,
amelia_fit$imputations$imp3$embarazo,
amelia_fit$imputations$imp4$embarazo,
amelia_fit$imputations$imp5$embarazo,
amelia_fit$imputations$imp6$embarazo,
amelia_fit$imputations$imp7$embarazo,
amelia_fit$imputations$imp8$embarazo,
amelia_fit$imputations$imp9$embarazo,
amelia_fit$imputations$imp10$embarazo,
amelia_fit$imputations$imp11$embarazo,
amelia_fit$imputations$imp12$embarazo,
amelia_fit$imputations$imp13$embarazo,
amelia_fit$imputations$imp14$embarazo,
amelia_fit$imputations$imp15$embarazo,
amelia_fit$imputations$imp16$embarazo,
amelia_fit$imputations$imp17$embarazo,
amelia_fit$imputations$imp18$embarazo,
amelia_fit$imputations$imp19$embarazo,
amelia_fit$imputations$imp20$embarazo,
amelia_fit$imputations$imp21$embarazo,
amelia_fit$imputations$imp22$embarazo,
amelia_fit$imputations$imp23$embarazo,
amelia_fit$imputations$imp24$embarazo,
amelia_fit$imputations$imp25$embarazo,
amelia_fit$imputations$imp26$embarazo,
amelia_fit$imputations$imp27$embarazo,
amelia_fit$imputations$imp28$embarazo,
amelia_fit$imputations$imp29$embarazo,
amelia_fit$imputations$imp30$embarazo,
amelia_fit$imputations$imp31$embarazo,
amelia_fit$imputations$imp32$embarazo,
amelia_fit$imputations$imp33$embarazo,
amelia_fit$imputations$imp34$embarazo,
amelia_fit$imputations$imp35$embarazo,
amelia_fit$imputations$imp36$embarazo,
amelia_fit$imputations$imp37$embarazo,
amelia_fit$imputations$imp38$embarazo,
amelia_fit$imputations$imp39$embarazo,
amelia_fit$imputations$imp40$embarazo,
amelia_fit$imputations$imp41$embarazo,
amelia_fit$imputations$imp42$embarazo,
amelia_fit$imputations$imp43$embarazo,
amelia_fit$imputations$imp44$embarazo,
amelia_fit$imputations$imp45$embarazo,
amelia_fit$imputations$imp46$embarazo,
amelia_fit$imputations$imp47$embarazo,
amelia_fit$imputations$imp48$embarazo,
amelia_fit$imputations$imp49$embarazo,
amelia_fit$imputations$imp50$embarazo,
amelia_fit$imputations$imp51$embarazo,
amelia_fit$imputations$imp52$embarazo,
amelia_fit$imputations$imp53$embarazo,
amelia_fit$imputations$imp54$embarazo,
amelia_fit$imputations$imp55$embarazo,
amelia_fit$imputations$imp56$embarazo,
amelia_fit$imputations$imp57$embarazo,
amelia_fit$imputations$imp58$embarazo,
amelia_fit$imputations$imp59$embarazo,
amelia_fit$imputations$imp60$embarazo,
amelia_fit$imputations$imp61$embarazo
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::summarise(pregnancy= sum(value=="TRUE"),
not_pregnancy=sum(value=="FALSE")) %>%
dplyr::mutate(embarazo_imp=dplyr::case_when(pregnancy>=31~T,
not_pregnancy>=31~F))
## `summarise()` ungrouping output (override with `.groups` argument)
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_women_miss7<-
CONS_C1_df_dup_SEP_2020_women_miss6 %>%
dplyr::left_join(dplyr::select(embarazo_imputed,amelia_fit_imputations_imp1_row,embarazo_imp), by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(embarazo=factor(dplyr::case_when(is.na(embarazo)~as.logical(embarazo_imp),T~as.logical(embarazo)))) %>%
dplyr::select(-embarazo_imp) %>%
data.table()
#CONS_C1_df_dup_SEP_2020_women_miss6
#table(is.na(CONS_C1_df_dup_SEP_2020_women_miss6$tipo_centro_pub))
#table(is.na(CONS_C1_df_dup_SEP_2020_women_miss6$nombre_region))
As a result of the imputations, there were no missing values once imputed.
Type of Plan
We looked over possible imputations to the type of plan (n=29).
tipo_de_plan_res_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$tipo_de_plan_res,
amelia_fit$imputations$imp2$tipo_de_plan_res,
amelia_fit$imputations$imp3$tipo_de_plan_res,
amelia_fit$imputations$imp4$tipo_de_plan_res,
amelia_fit$imputations$imp5$tipo_de_plan_res,
amelia_fit$imputations$imp6$tipo_de_plan_res,
amelia_fit$imputations$imp7$tipo_de_plan_res,
amelia_fit$imputations$imp8$tipo_de_plan_res,
amelia_fit$imputations$imp9$tipo_de_plan_res,
amelia_fit$imputations$imp10$tipo_de_plan_res,
amelia_fit$imputations$imp11$tipo_de_plan_res,
amelia_fit$imputations$imp12$tipo_de_plan_res,
amelia_fit$imputations$imp13$tipo_de_plan_res,
amelia_fit$imputations$imp14$tipo_de_plan_res,
amelia_fit$imputations$imp15$tipo_de_plan_res,
amelia_fit$imputations$imp16$tipo_de_plan_res,
amelia_fit$imputations$imp17$tipo_de_plan_res,
amelia_fit$imputations$imp18$tipo_de_plan_res,
amelia_fit$imputations$imp19$tipo_de_plan_res,
amelia_fit$imputations$imp20$tipo_de_plan_res,
amelia_fit$imputations$imp21$tipo_de_plan_res,
amelia_fit$imputations$imp22$tipo_de_plan_res,
amelia_fit$imputations$imp23$tipo_de_plan_res,
amelia_fit$imputations$imp24$tipo_de_plan_res,
amelia_fit$imputations$imp25$tipo_de_plan_res,
amelia_fit$imputations$imp26$tipo_de_plan_res,
amelia_fit$imputations$imp27$tipo_de_plan_res,
amelia_fit$imputations$imp28$tipo_de_plan_res,
amelia_fit$imputations$imp29$tipo_de_plan_res,
amelia_fit$imputations$imp30$tipo_de_plan_res,
amelia_fit$imputations$imp31$tipo_de_plan_res,
amelia_fit$imputations$imp32$tipo_de_plan_res,
amelia_fit$imputations$imp33$tipo_de_plan_res,
amelia_fit$imputations$imp34$tipo_de_plan_res,
amelia_fit$imputations$imp35$tipo_de_plan_res,
amelia_fit$imputations$imp36$tipo_de_plan_res,
amelia_fit$imputations$imp37$tipo_de_plan_res,
amelia_fit$imputations$imp38$tipo_de_plan_res,
amelia_fit$imputations$imp39$tipo_de_plan_res,
amelia_fit$imputations$imp40$tipo_de_plan_res,
amelia_fit$imputations$imp41$tipo_de_plan_res,
amelia_fit$imputations$imp42$tipo_de_plan_res,
amelia_fit$imputations$imp43$tipo_de_plan_res,
amelia_fit$imputations$imp44$tipo_de_plan_res,
amelia_fit$imputations$imp45$tipo_de_plan_res,
amelia_fit$imputations$imp46$tipo_de_plan_res,
amelia_fit$imputations$imp47$tipo_de_plan_res,
amelia_fit$imputations$imp48$tipo_de_plan_res,
amelia_fit$imputations$imp49$tipo_de_plan_res,
amelia_fit$imputations$imp50$tipo_de_plan_res,
amelia_fit$imputations$imp51$tipo_de_plan_res,
amelia_fit$imputations$imp52$tipo_de_plan_res,
amelia_fit$imputations$imp53$tipo_de_plan_res,
amelia_fit$imputations$imp54$tipo_de_plan_res,
amelia_fit$imputations$imp55$tipo_de_plan_res,
amelia_fit$imputations$imp56$tipo_de_plan_res,
amelia_fit$imputations$imp57$tipo_de_plan_res,
amelia_fit$imputations$imp58$tipo_de_plan_res,
amelia_fit$imputations$imp59$tipo_de_plan_res,
amelia_fit$imputations$imp60$tipo_de_plan_res,
amelia_fit$imputations$imp61$tipo_de_plan_res
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::summarise(outpatient= sum(value=="Outpatient"),
residential= sum(value=="Residential")) %>%
dplyr::mutate(tipo_de_plan_res_imp=dplyr::case_when(outpatient>=31~"Outpatient",
residential>=31~"Residential"))
## `summarise()` ungrouping output (override with `.groups` argument)
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_women_miss8<-
CONS_C1_df_dup_SEP_2020_women_miss7 %>%
dplyr::left_join(dplyr::select(tipo_de_plan_res_imputed,amelia_fit_imputations_imp1_row,tipo_de_plan_res_imp), by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(tipo_de_plan_res=factor(dplyr::case_when(is.na(tipo_de_plan_res)~as.character(tipo_de_plan_res_imp),T~as.character(tipo_de_plan_res)))) %>%
dplyr::select(-tipo_de_plan_res_imp) %>%
data.table()
#table(is.na(CONS_C1_df_dup_SEP_2020_women_miss6$tipo_centro_pub))
#table(is.na(CONS_C1_df_dup_SEP_2020_women_miss6$nombre_region))
As a result of the imputations, there were no missing values once imputed.
Cause of Discharge
We looked over possible imputations to the truly missing values, discarding missing values due to censorship (n=7).
motivo_de_egreso_a_imputar<-
CONS_C1_df_dup_SEP_2020_women_miss %>% dplyr::filter(is.na(motivodeegreso_mod_imp)) %>% dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,fech_egres_imp)) %>% dplyr::filter(!is.na(fech_egres_imp))%>%dplyr::select(row)
## Joining, by = "row"
motivodeegreso_mod_imp_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$motivodeegreso_mod_imp,
amelia_fit$imputations$imp2$motivodeegreso_mod_imp,
amelia_fit$imputations$imp3$motivodeegreso_mod_imp,
amelia_fit$imputations$imp4$motivodeegreso_mod_imp,
amelia_fit$imputations$imp5$motivodeegreso_mod_imp,
amelia_fit$imputations$imp6$motivodeegreso_mod_imp,
amelia_fit$imputations$imp7$motivodeegreso_mod_imp,
amelia_fit$imputations$imp8$motivodeegreso_mod_imp,
amelia_fit$imputations$imp9$motivodeegreso_mod_imp,
amelia_fit$imputations$imp10$motivodeegreso_mod_imp,
amelia_fit$imputations$imp11$motivodeegreso_mod_imp,
amelia_fit$imputations$imp12$motivodeegreso_mod_imp,
amelia_fit$imputations$imp13$motivodeegreso_mod_imp,
amelia_fit$imputations$imp14$motivodeegreso_mod_imp,
amelia_fit$imputations$imp15$motivodeegreso_mod_imp,
amelia_fit$imputations$imp16$motivodeegreso_mod_imp,
amelia_fit$imputations$imp17$motivodeegreso_mod_imp,
amelia_fit$imputations$imp18$motivodeegreso_mod_imp,
amelia_fit$imputations$imp19$motivodeegreso_mod_imp,
amelia_fit$imputations$imp20$motivodeegreso_mod_imp,
amelia_fit$imputations$imp21$motivodeegreso_mod_imp,
amelia_fit$imputations$imp22$motivodeegreso_mod_imp,
amelia_fit$imputations$imp23$motivodeegreso_mod_imp,
amelia_fit$imputations$imp24$motivodeegreso_mod_imp,
amelia_fit$imputations$imp25$motivodeegreso_mod_imp,
amelia_fit$imputations$imp26$motivodeegreso_mod_imp,
amelia_fit$imputations$imp27$motivodeegreso_mod_imp,
amelia_fit$imputations$imp28$motivodeegreso_mod_imp,
amelia_fit$imputations$imp29$motivodeegreso_mod_imp,
amelia_fit$imputations$imp30$motivodeegreso_mod_imp,
amelia_fit$imputations$imp31$motivodeegreso_mod_imp,
amelia_fit$imputations$imp32$motivodeegreso_mod_imp,
amelia_fit$imputations$imp33$motivodeegreso_mod_imp,
amelia_fit$imputations$imp34$motivodeegreso_mod_imp,
amelia_fit$imputations$imp35$motivodeegreso_mod_imp,
amelia_fit$imputations$imp36$motivodeegreso_mod_imp,
amelia_fit$imputations$imp37$motivodeegreso_mod_imp,
amelia_fit$imputations$imp38$motivodeegreso_mod_imp,
amelia_fit$imputations$imp39$motivodeegreso_mod_imp,
amelia_fit$imputations$imp40$motivodeegreso_mod_imp,
amelia_fit$imputations$imp41$motivodeegreso_mod_imp,
amelia_fit$imputations$imp42$motivodeegreso_mod_imp,
amelia_fit$imputations$imp43$motivodeegreso_mod_imp,
amelia_fit$imputations$imp44$motivodeegreso_mod_imp,
amelia_fit$imputations$imp45$motivodeegreso_mod_imp,
amelia_fit$imputations$imp46$motivodeegreso_mod_imp,
amelia_fit$imputations$imp47$motivodeegreso_mod_imp,
amelia_fit$imputations$imp48$motivodeegreso_mod_imp,
amelia_fit$imputations$imp49$motivodeegreso_mod_imp,
amelia_fit$imputations$imp50$motivodeegreso_mod_imp,
amelia_fit$imputations$imp51$motivodeegreso_mod_imp,
amelia_fit$imputations$imp52$motivodeegreso_mod_imp,
amelia_fit$imputations$imp53$motivodeegreso_mod_imp,
amelia_fit$imputations$imp54$motivodeegreso_mod_imp,
amelia_fit$imputations$imp55$motivodeegreso_mod_imp,
amelia_fit$imputations$imp56$motivodeegreso_mod_imp,
amelia_fit$imputations$imp57$motivodeegreso_mod_imp,
amelia_fit$imputations$imp58$motivodeegreso_mod_imp,
amelia_fit$imputations$imp59$motivodeegreso_mod_imp,
amelia_fit$imputations$imp60$motivodeegreso_mod_imp,
amelia_fit$imputations$imp61$motivodeegreso_mod_imp
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::arrange(amelia_fit_imputations_imp1_row) %>%
dplyr::ungroup() %>%
dplyr::filter(amelia_fit_imputations_imp1_row %in% unlist(motivo_de_egreso_a_imputar$row)) %>%
#FILTRAR CASOS QUE SON ILÓGICOS: MUERTES CON TRATAMIENTOS POSTERIORES (1)
dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,motivodeegreso_mod_imp, fech_egres_imp,dup, duplicates_filtered,evaluacindelprocesoteraputico),by=c("amelia_fit_imputations_imp1_row"="row")) %>%
dplyr::mutate(value_death=dplyr::case_when(value=="Death"& duplicates_filtered>dup~1,TRUE~0)) %>%
dplyr::filter(value_death!=1) %>%
#:#:#:#:#:
dplyr::count(amelia_fit_imputations_imp1_row,value) %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::slice_min(n, n = 1) %>%
dplyr::summarise(adm_dis=sum(value == "Administrative discharge",na.rm=T),
death=sum(value == "Death",na.rm=T),
referral=sum(value == "Referral to another treatment",na.rm=T),
ter_dis=sum(value == "Therapeutic discharge",na.rm=T),
dropout=sum(value =="Drop-out",na.rm=T)) %>%
rowwise() %>%
dplyr::mutate(ties=sum(c_across(adm_dis:dropout)),ties=ifelse(ties>1,1,0)) %>%
#dplyr::filter(ties==1) %>%
dplyr::ungroup() %>%
dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,motivodeegreso_mod_imp, fech_egres_imp,fech_egres_num,dup, duplicates_filtered,evaluacindelprocesoteraputico,tipo_centro_derivacion),by=c("amelia_fit_imputations_imp1_row"="row")) %>%
dplyr::mutate(motivodeegreso_mod_imp_imputation= dplyr::case_when(
ties==0 & adm_dis==1 & fech_egres_imp<"2019-11-13"~"Administrative discharge",
#its an absorving state. should not have posterior treatments
ties==0 & death==1 & fech_egres_imp<"2019-11-13" & dup==duplicates_filtered~"Death",
ties==0 & referral==1 & fech_egres_imp<"2019-11-13"~"Referral to another treatment",
ties==0 & ter_dis==1 & fech_egres_imp<"2019-11-13"~"Therapeutic discharge",
ties==0 & dropout==1 & fech_egres_imp<"2019-11-13"~"Drop-out",
#si no hay fecha de egreso, está en la fecha de censura, sólo puede ser tratamiento en curso
fech_egres_imp>="2019-11-13"~NA_character_,
TRUE~NA_character_)) %>%
#si tiene evaluacindelprocesoteraputico, es porque no es un tratamiento en curso
dplyr::rename("motivodeegreso_mod_imp_original"="motivodeegreso_mod_imp")
## `summarise()` ungrouping output (override with `.groups` argument)
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:
CONS_C1_df_dup_SEP_2020_women_miss9<-
CONS_C1_df_dup_SEP_2020_women_miss8 %>%
dplyr::left_join(motivodeegreso_mod_imp_imputed[,c("amelia_fit_imputations_imp1_row","motivodeegreso_mod_imp_original","fech_egres_imp","fech_egres_num","motivodeegreso_mod_imp_imputation")], by=c("row"="amelia_fit_imputations_imp1_row")) %>%
#dplyr::filter(is.na(motivodeegreso_mod_imp)) %>% dplyr::select(row,hash_key,motivodeegreso_mod_imp_original, motivodeegreso_mod_imp_imputation,motivodeegreso_mod_imp,fech_egres_num,fech_egres_imp)
dplyr::mutate(motivodeegreso_mod_imp=factor(dplyr::case_when(is.na(motivodeegreso_mod_imp)~motivodeegreso_mod_imp_imputation,
motivodeegreso_mod_imp_original=="Ongoing treatment"~NA_character_, TRUE~as.character(motivodeegreso_mod_imp)))) %>%
dplyr::select(-motivodeegreso_mod_imp_imputation,-fech_egres_imp,-fech_egres_num,-motivodeegreso_mod_imp_original) %>%
#dplyr::rename_all( list(~paste0(., ".left"))) %>%
dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,motivodeegreso_mod_imp) %>%
dplyr::rename("motivodeegreso_mod_imp_original"="motivodeegreso_mod_imp"),by="row") %>%
data.table()
#CONS_C1_df_dup_SEP_2020_women_miss9 %>% janitor::tabyl(motivodeegreso_mod_imp,motivodeegreso_mod_imp_original)
#CONS_C1_df_dup_SEP_2020_women_miss9 %>% janitor::tabyl(motivodeegreso_mod_imp_original)
CONS_C1_df_dup_SEP_2020_women_miss9 %>% janitor::tabyl(motivodeegreso_mod_imp,motivodeegreso_mod_imp_original) %>%
knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
caption = paste0("Table 3. Imputed Cause of Discharge vs. Original Cause of Discharge"),
#col.names = c("Cause of Discharge","1-High Achievement", "2- Medium Achievement","3- Minimum Achievement","Null Values"),
align =rep('c', 101)) %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 12) %>%
kableExtra::add_footnote("Note. NA= Null values", notation="none") %>%
kableExtra::scroll_box(width = "100%", height = "375px")
Table 3. Imputed Cause of Discharge vs. Original Cause of Discharge
|
motivodeegreso_mod_imp
|
Late Drop-out
|
Early Drop-out
|
Administrative discharge
|
Therapeutic discharge
|
Referral to another treatment
|
Ongoing treatment
|
Death
|
NA_
|
|
Administrative discharge
|
0
|
0
|
2,440
|
0
|
0
|
0
|
0
|
1
|
|
Early Drop-out
|
0
|
4,460
|
0
|
0
|
0
|
0
|
0
|
0
|
|
Late Drop-out
|
9,416
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
|
Ongoing treatment
|
0
|
0
|
0
|
0
|
0
|
2,424
|
0
|
0
|
|
Referral to another treatment
|
0
|
0
|
0
|
0
|
3,872
|
0
|
0
|
1
|
|
Therapeutic discharge
|
0
|
0
|
0
|
6,832
|
0
|
0
|
0
|
0
|
|
NA
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
5
|
|
Note. NA= Null values
|
#
if(
CONS_C1_df_dup_SEP_2020_women_miss9 %>% dplyr::filter(motivodeegreso_mod_imp_original!="Ongoing treatment",is.na(motivodeegreso_mod_imp)) %>% nrow()>0){"There are missing values on the cause of discharge"}
A total of 2 cases were not imputed due to ties in the imputed values.
Biopsychosocial involvement
Another variable that is worth imputing is the Biopsychosocial involvement (n= 492). In case of ties, we selected the imputed values with the value with the minimum involvement. In case of ties, we chose the most vulnerable value.
# Ver distintos valores propuestos para sustancia de inciio
#No se ve un patrón de dependencia entre el compromiso biopsicosocial y el estatus de egreso
# table(CONS_C1_df_dup_SEP_2020_women_miss$compromiso_biopsicosocial,
# CONS_C1_df_dup_SEP_2020_women_miss$motivodeegreso_mod_imp)
comp_biopsisoc_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$compromiso_biopsicosocial,
amelia_fit$imputations$imp2$compromiso_biopsicosocial,
amelia_fit$imputations$imp3$compromiso_biopsicosocial,
amelia_fit$imputations$imp4$compromiso_biopsicosocial,
amelia_fit$imputations$imp5$compromiso_biopsicosocial,
amelia_fit$imputations$imp6$compromiso_biopsicosocial,
amelia_fit$imputations$imp7$compromiso_biopsicosocial,
amelia_fit$imputations$imp8$compromiso_biopsicosocial,
amelia_fit$imputations$imp9$compromiso_biopsicosocial,
amelia_fit$imputations$imp10$compromiso_biopsicosocial,
amelia_fit$imputations$imp11$compromiso_biopsicosocial,
amelia_fit$imputations$imp12$compromiso_biopsicosocial,
amelia_fit$imputations$imp13$compromiso_biopsicosocial,
amelia_fit$imputations$imp14$compromiso_biopsicosocial,
amelia_fit$imputations$imp15$compromiso_biopsicosocial,
amelia_fit$imputations$imp16$compromiso_biopsicosocial,
amelia_fit$imputations$imp17$compromiso_biopsicosocial,
amelia_fit$imputations$imp18$compromiso_biopsicosocial,
amelia_fit$imputations$imp19$compromiso_biopsicosocial,
amelia_fit$imputations$imp20$compromiso_biopsicosocial,
amelia_fit$imputations$imp21$compromiso_biopsicosocial,
amelia_fit$imputations$imp22$compromiso_biopsicosocial,
amelia_fit$imputations$imp23$compromiso_biopsicosocial,
amelia_fit$imputations$imp24$compromiso_biopsicosocial,
amelia_fit$imputations$imp25$compromiso_biopsicosocial,
amelia_fit$imputations$imp26$compromiso_biopsicosocial,
amelia_fit$imputations$imp27$compromiso_biopsicosocial,
amelia_fit$imputations$imp28$compromiso_biopsicosocial,
amelia_fit$imputations$imp29$compromiso_biopsicosocial,
amelia_fit$imputations$imp30$compromiso_biopsicosocial,
amelia_fit$imputations$imp31$compromiso_biopsicosocial,
amelia_fit$imputations$imp32$compromiso_biopsicosocial,
amelia_fit$imputations$imp33$compromiso_biopsicosocial,
amelia_fit$imputations$imp34$compromiso_biopsicosocial,
amelia_fit$imputations$imp35$compromiso_biopsicosocial,
amelia_fit$imputations$imp36$compromiso_biopsicosocial,
amelia_fit$imputations$imp37$compromiso_biopsicosocial,
amelia_fit$imputations$imp38$compromiso_biopsicosocial,
amelia_fit$imputations$imp39$compromiso_biopsicosocial,
amelia_fit$imputations$imp40$compromiso_biopsicosocial,
amelia_fit$imputations$imp41$compromiso_biopsicosocial,
amelia_fit$imputations$imp42$compromiso_biopsicosocial,
amelia_fit$imputations$imp43$compromiso_biopsicosocial,
amelia_fit$imputations$imp44$compromiso_biopsicosocial,
amelia_fit$imputations$imp45$compromiso_biopsicosocial,
amelia_fit$imputations$imp46$compromiso_biopsicosocial,
amelia_fit$imputations$imp47$compromiso_biopsicosocial,
amelia_fit$imputations$imp48$compromiso_biopsicosocial,
amelia_fit$imputations$imp49$compromiso_biopsicosocial,
amelia_fit$imputations$imp50$compromiso_biopsicosocial,
amelia_fit$imputations$imp51$compromiso_biopsicosocial,
amelia_fit$imputations$imp52$compromiso_biopsicosocial,
amelia_fit$imputations$imp53$compromiso_biopsicosocial,
amelia_fit$imputations$imp54$compromiso_biopsicosocial,
amelia_fit$imputations$imp55$compromiso_biopsicosocial,
amelia_fit$imputations$imp56$compromiso_biopsicosocial,
amelia_fit$imputations$imp57$compromiso_biopsicosocial,
amelia_fit$imputations$imp58$compromiso_biopsicosocial,
amelia_fit$imputations$imp59$compromiso_biopsicosocial,
amelia_fit$imputations$imp60$compromiso_biopsicosocial,
amelia_fit$imputations$imp61$compromiso_biopsicosocial
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::arrange(amelia_fit_imputations_imp1_row) %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
# 1-Mild 2-Moderate 3-Severe
dplyr::summarise(severe_3=sum(value == "3-Severe",na.rm=T),
mod_2=sum(value == "2-Moderate",na.rm=T),
mild_1=sum(value =="1-Mild",na.rm=T)) %>%
dplyr::ungroup() %>%
dplyr::mutate(comp_biopsisoc_imp= dplyr::case_when(
(severe_3>mild_1) & (severe_3>mod_2)~"3-Severe",
(mod_2>mild_1) & (mod_2>severe_3)~"2-Moderate",
(mild_1>mod_2) & (mild_1>severe_3)~"1-Mild"
)) %>%
#2) Resolve ties
dplyr::mutate(ties= dplyr::case_when(is.na(comp_biopsisoc_imp)~1,T~0)) %>%
dplyr::mutate(comp_biopsisoc_imp= dplyr::case_when(ties==1 & ((mild_1>mod_2)|(mild_1>severe_3))~"1-Mild",
ties==1 & ((mod_2>mild_1)|(mod_2>severe_3))~"2-Moderate",
T~comp_biopsisoc_imp))
## `summarise()` ungrouping output (override with `.groups` argument)
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
##
#CONS_C1_df_dup_SEP_2020 %>% janitor::tabyl(motivodeegreso_mod_imp,evaluacindelprocesoteraputico)
CONS_C1_df_dup_SEP_2020_women_miss10<-
CONS_C1_df_dup_SEP_2020_women_miss9 %>%
dplyr::left_join(comp_biopsisoc_imputed[,c("amelia_fit_imputations_imp1_row","comp_biopsisoc_imp")], by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(compromiso_biopsicosocial=factor(dplyr::case_when(is.na(compromiso_biopsicosocial) ~comp_biopsisoc_imp,
TRUE~as.character(compromiso_biopsicosocial)))) %>%
dplyr::mutate(compromiso_biopsicosocial=parse_factor(as.character(compromiso_biopsicosocial),levels=c('1-Mild', '2-Moderate','3-Severe'), ordered =T,trim_ws=T,include_na =F, locale=locale(encoding = "UTF-8"))) %>%
dplyr::select(-comp_biopsisoc_imp,-motivodeegreso_mod_imp_original) %>%
data.table()
As a result of the imputations, there were no missing values once imputed.
Tenure status of households
Another variable that is worth imputing is the Tenure status of households (n= 1,370). In case of ties, we selected the imputed values with the value with the minimum involvement. In case of ties, we kept what we thought was the most vulnerable value (discarding “Owner” or “Renting” values).
tenencia_de_la_vivienda_mod_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp2$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp3$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp4$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp5$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp6$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp7$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp8$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp9$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp10$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp11$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp12$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp13$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp14$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp15$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp16$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp17$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp18$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp19$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp20$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp21$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp22$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp23$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp24$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp25$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp26$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp27$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp28$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp29$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp30$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp31$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp32$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp33$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp34$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp35$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp36$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp37$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp38$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp39$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp40$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp41$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp42$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp43$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp44$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp45$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp46$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp47$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp48$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp49$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp50$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp51$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp52$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp53$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp54$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp55$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp56$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp57$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp58$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp59$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp60$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp61$tenencia_de_la_vivienda_mod
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row, value) %>%
tally() %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::top_n(1,n) %>%
dplyr::ungroup()
#tenencia_de_la_vivienda_mod_imputed %>%
# pivot_wider(id_cols="amelia_fit_imputations_imp1_row",names_from="value", values_from="n", values_fill=0) %>%
# dplyr::ungroup()
tenencia_de_la_vivienda_mod_imputed_dup<-
tenencia_de_la_vivienda_mod_imputed %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(num=n()) %>%
dplyr::filter(num>1) %>%
dplyr::ungroup() %>%
#1) owner, discard if it is in the maximum
dplyr::mutate(n=dplyr::case_when(value=="Owner"~0,T~as.numeric(n))) %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::top_n(1,n) %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
#2) Renting vs. stays temporarily with a relative, keep the second
dplyr::mutate(n=dplyr::case_when(value=="Renting"~0,T~as.numeric(n))) %>%
dplyr::top_n(1,n) %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
#3) Transferred dwellings vs. stays temporarily with a relative, keep the second
dplyr::mutate(n=dplyr::case_when(value=="Transferred dwellings"~0,T~as.numeric(n))) %>%
dplyr::top_n(1,n) %>%
dplyr::ungroup()
tenencia_de_la_vivienda_mod_imputed_final<-
tenencia_de_la_vivienda_mod_imputed %>%
dplyr::left_join(tenencia_de_la_vivienda_mod_imputed_dup, by=c("amelia_fit_imputations_imp1_row", "value")) %>%
#si es vacío, y no está en la base, es valor 0 (es difícil que)
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(sum= suppressWarnings(max(num, na.rm=T))) %>%
dplyr::ungroup() %>%
#descarto los que presentaron más de un valor para una misma fila y aquellos que no fueron seleccionados
dplyr::mutate(descartar=dplyr::case_when(sum>1 & is.na(n.y)~1,T~0)) %>%
dplyr::filter(descartar==0)
ifelse(nrow(tenencia_de_la_vivienda_mod_imputed_final)/length(unique(CONS_C1_df_dup_SEP_2020_women_miss10$row))>1,
"There are still more than one value in the imputation","")
## [1] ""
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
#CONS_C1_df_dup_SEP_2020 %>% janitor::tabyl(motivodeegreso_mod_imp,evaluacindelprocesoteraputico)
CONS_C1_df_dup_SEP_2020_women_miss11<-
CONS_C1_df_dup_SEP_2020_women_miss10 %>%
dplyr::left_join(tenencia_de_la_vivienda_mod_imputed_final[,c("amelia_fit_imputations_imp1_row","value")], by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(tenencia_de_la_vivienda_mod=factor(dplyr::case_when(is.na(tenencia_de_la_vivienda_mod) ~value,
TRUE~as.character(tenencia_de_la_vivienda_mod)))) %>%
dplyr::select(-value) %>%
data.table()
As a result of the imputations, there were no missing values once imputed.
Number of children (max. Value)
A numeric variable that had a great proportion of missing values was this (n= 99).
compare.density(amelia_fit,var="numero_de_hijos_mod", main=NULL)
As seen in the figure above, most of the imputations were around 1 and 3 children, leaving less space for an imputation of no children or more than 3. We imputed these values, by approximating the mean of the 61 candidate values to a discrete number.
#On this graph, a y = x line indicates the line of perfect agreement; that is, if the imputation model was a perfect predictor of the true value, all the imputations would fall on this line
no_mostrar=0
if(no_mostrar==1){w
res <- {
setTimeLimit(nn_K*500)
ovr_imp_edad_ini_cons<-overimpute(amelia_fit, var = "edad_al_ing")
}
}
paste0("Users that had more than one treatment with missing values in the no. of children: ",CONS_C1_df_dup_SEP_2020_women_miss %>%
dplyr::group_by(hash_key) %>%
dplyr::mutate(na_numero_de_hijos_mod=sum(is.na(numero_de_hijos_mod))) %>%
dplyr::ungroup() %>%
dplyr::filter(na_numero_de_hijos_mod>0) %>%
dplyr::group_by(hash_key) %>%
dplyr::summarise(n=n()) %>% dplyr::filter(n>1) %>% nrow())
## [1] "Users that had more than one treatment with missing values in the no. of children: 29"
numero_de_hijos_mod_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$numero_de_hijos_mod,
amelia_fit$imputations$imp2$numero_de_hijos_mod,
amelia_fit$imputations$imp3$numero_de_hijos_mod,
amelia_fit$imputations$imp4$numero_de_hijos_mod,
amelia_fit$imputations$imp5$numero_de_hijos_mod,
amelia_fit$imputations$imp6$numero_de_hijos_mod,
amelia_fit$imputations$imp7$numero_de_hijos_mod,
amelia_fit$imputations$imp8$numero_de_hijos_mod,
amelia_fit$imputations$imp9$numero_de_hijos_mod,
amelia_fit$imputations$imp10$numero_de_hijos_mod,
amelia_fit$imputations$imp11$numero_de_hijos_mod,
amelia_fit$imputations$imp12$numero_de_hijos_mod,
amelia_fit$imputations$imp13$numero_de_hijos_mod,
amelia_fit$imputations$imp14$numero_de_hijos_mod,
amelia_fit$imputations$imp15$numero_de_hijos_mod,
amelia_fit$imputations$imp16$numero_de_hijos_mod,
amelia_fit$imputations$imp17$numero_de_hijos_mod,
amelia_fit$imputations$imp18$numero_de_hijos_mod,
amelia_fit$imputations$imp19$numero_de_hijos_mod,
amelia_fit$imputations$imp20$numero_de_hijos_mod,
amelia_fit$imputations$imp21$numero_de_hijos_mod,
amelia_fit$imputations$imp22$numero_de_hijos_mod,
amelia_fit$imputations$imp23$numero_de_hijos_mod,
amelia_fit$imputations$imp24$numero_de_hijos_mod,
amelia_fit$imputations$imp25$numero_de_hijos_mod,
amelia_fit$imputations$imp26$numero_de_hijos_mod,
amelia_fit$imputations$imp27$numero_de_hijos_mod,
amelia_fit$imputations$imp28$numero_de_hijos_mod,
amelia_fit$imputations$imp29$numero_de_hijos_mod,
amelia_fit$imputations$imp30$numero_de_hijos_mod,
amelia_fit$imputations$imp31$numero_de_hijos_mod,
amelia_fit$imputations$imp32$numero_de_hijos_mod,
amelia_fit$imputations$imp33$numero_de_hijos_mod,
amelia_fit$imputations$imp34$numero_de_hijos_mod,
amelia_fit$imputations$imp35$numero_de_hijos_mod,
amelia_fit$imputations$imp36$numero_de_hijos_mod,
amelia_fit$imputations$imp37$numero_de_hijos_mod,
amelia_fit$imputations$imp38$numero_de_hijos_mod,
amelia_fit$imputations$imp39$numero_de_hijos_mod,
amelia_fit$imputations$imp40$numero_de_hijos_mod,
amelia_fit$imputations$imp41$numero_de_hijos_mod,
amelia_fit$imputations$imp42$numero_de_hijos_mod,
amelia_fit$imputations$imp43$numero_de_hijos_mod,
amelia_fit$imputations$imp44$numero_de_hijos_mod,
amelia_fit$imputations$imp45$numero_de_hijos_mod,
amelia_fit$imputations$imp46$numero_de_hijos_mod,
amelia_fit$imputations$imp47$numero_de_hijos_mod,
amelia_fit$imputations$imp48$numero_de_hijos_mod,
amelia_fit$imputations$imp49$numero_de_hijos_mod,
amelia_fit$imputations$imp50$numero_de_hijos_mod,
amelia_fit$imputations$imp51$numero_de_hijos_mod,
amelia_fit$imputations$imp52$numero_de_hijos_mod,
amelia_fit$imputations$imp53$numero_de_hijos_mod,
amelia_fit$imputations$imp54$numero_de_hijos_mod,
amelia_fit$imputations$imp55$numero_de_hijos_mod,
amelia_fit$imputations$imp56$numero_de_hijos_mod,
amelia_fit$imputations$imp57$numero_de_hijos_mod,
amelia_fit$imputations$imp58$numero_de_hijos_mod,
amelia_fit$imputations$imp59$numero_de_hijos_mod,
amelia_fit$imputations$imp60$numero_de_hijos_mod,
amelia_fit$imputations$imp61$numero_de_hijos_mod
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
#change the orientation of negative values
dplyr::mutate(value=abs(value)) %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::summarise(avg_numero_de_hijos_mod_imp= round(mean(value,na.rm=T),0)) %>%
dplyr::ungroup()
# Reemplazo los valores perdidos:
CONS_C1_df_dup_SEP_2020_women_miss12<-
CONS_C1_df_dup_SEP_2020_women_miss11 %>%
dplyr::left_join(numero_de_hijos_mod_imputed,by=c("row"="amelia_fit_imputations_imp1_row")) %>%
#si la edad al ingreso no existe, el valor promedio imutado es
dplyr::mutate(numero_de_hijos_mod=dplyr::case_when(is.na(numero_de_hijos_mod)~as.numeric(avg_numero_de_hijos_mod_imp),TRUE~as.numeric(numero_de_hijos_mod))) %>%
dplyr::select(-avg_numero_de_hijos_mod_imp)
#table(is.na(CONS_C1_df_dup_SEP_2020_women_miss12$numero_de_hijos_mod))
As a result of the imputations, there were no missing values once imputed.
Number of children into a residential treatment
A numeric variable that had a great amount of missing values was this (n= 3,399).
compare.density(amelia_fit,var="num_hijos_trat_res_mod")
As seen in the figure above, most of the imputations adopted the value 0, consistent with the distribution of this variable. Not discrete imputations should be approximated once obtained an average value among the 61 candidates for imputation.
#On this graph, a y = x line indicates the line of perfect agreement; that is, if the imputation model was a perfect predictor of the true value, all the imputations would fall on this line
no_mostrar=0
if(no_mostrar==1){
res <- {
setTimeLimit(nn_K*500)
ovr_imp_edad_ini_cons<-overimpute(amelia_fit, var = "edad_al_ing")
}
}
paste0("Users that had more than one treatment with missing values in the no. of children in a residential treatment: ",CONS_C1_df_dup_SEP_2020_women_miss %>%
dplyr::group_by(hash_key) %>%
dplyr::mutate(na_num_hijos_trat_res_mod=sum(is.na(num_hijos_trat_res_mod))) %>%
dplyr::ungroup() %>%
dplyr::filter(na_num_hijos_trat_res_mod>0) %>%
dplyr::group_by(hash_key) %>%
dplyr::summarise(n=n()) %>% dplyr::filter(n>1) %>% nrow())
## [1] "Users that had more than one treatment with missing values in the no. of children in a residential treatment: 1240"
num_hijos_trat_res_mod_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$num_hijos_trat_res_mod,
amelia_fit$imputations$imp2$num_hijos_trat_res_mod,
amelia_fit$imputations$imp3$num_hijos_trat_res_mod,
amelia_fit$imputations$imp4$num_hijos_trat_res_mod,
amelia_fit$imputations$imp5$num_hijos_trat_res_mod,
amelia_fit$imputations$imp6$num_hijos_trat_res_mod,
amelia_fit$imputations$imp7$num_hijos_trat_res_mod,
amelia_fit$imputations$imp8$num_hijos_trat_res_mod,
amelia_fit$imputations$imp9$num_hijos_trat_res_mod,
amelia_fit$imputations$imp10$num_hijos_trat_res_mod,
amelia_fit$imputations$imp11$num_hijos_trat_res_mod,
amelia_fit$imputations$imp12$num_hijos_trat_res_mod,
amelia_fit$imputations$imp13$num_hijos_trat_res_mod,
amelia_fit$imputations$imp14$num_hijos_trat_res_mod,
amelia_fit$imputations$imp15$num_hijos_trat_res_mod,
amelia_fit$imputations$imp16$num_hijos_trat_res_mod,
amelia_fit$imputations$imp17$num_hijos_trat_res_mod,
amelia_fit$imputations$imp18$num_hijos_trat_res_mod,
amelia_fit$imputations$imp19$num_hijos_trat_res_mod,
amelia_fit$imputations$imp20$num_hijos_trat_res_mod,
amelia_fit$imputations$imp21$num_hijos_trat_res_mod,
amelia_fit$imputations$imp22$num_hijos_trat_res_mod,
amelia_fit$imputations$imp23$num_hijos_trat_res_mod,
amelia_fit$imputations$imp24$num_hijos_trat_res_mod,
amelia_fit$imputations$imp25$num_hijos_trat_res_mod,
amelia_fit$imputations$imp26$num_hijos_trat_res_mod,
amelia_fit$imputations$imp27$num_hijos_trat_res_mod,
amelia_fit$imputations$imp28$num_hijos_trat_res_mod,
amelia_fit$imputations$imp29$num_hijos_trat_res_mod,
amelia_fit$imputations$imp30$num_hijos_trat_res_mod,
amelia_fit$imputations$imp31$num_hijos_trat_res_mod,
amelia_fit$imputations$imp32$num_hijos_trat_res_mod,
amelia_fit$imputations$imp33$num_hijos_trat_res_mod,
amelia_fit$imputations$imp34$num_hijos_trat_res_mod,
amelia_fit$imputations$imp35$num_hijos_trat_res_mod,
amelia_fit$imputations$imp36$num_hijos_trat_res_mod,
amelia_fit$imputations$imp37$num_hijos_trat_res_mod,
amelia_fit$imputations$imp38$num_hijos_trat_res_mod,
amelia_fit$imputations$imp39$num_hijos_trat_res_mod,
amelia_fit$imputations$imp40$num_hijos_trat_res_mod,
amelia_fit$imputations$imp41$num_hijos_trat_res_mod,
amelia_fit$imputations$imp42$num_hijos_trat_res_mod,
amelia_fit$imputations$imp43$num_hijos_trat_res_mod,
amelia_fit$imputations$imp44$num_hijos_trat_res_mod,
amelia_fit$imputations$imp45$num_hijos_trat_res_mod,
amelia_fit$imputations$imp46$num_hijos_trat_res_mod,
amelia_fit$imputations$imp47$num_hijos_trat_res_mod,
amelia_fit$imputations$imp48$num_hijos_trat_res_mod,
amelia_fit$imputations$imp49$num_hijos_trat_res_mod,
amelia_fit$imputations$imp50$num_hijos_trat_res_mod,
amelia_fit$imputations$imp51$num_hijos_trat_res_mod,
amelia_fit$imputations$imp52$num_hijos_trat_res_mod,
amelia_fit$imputations$imp53$num_hijos_trat_res_mod,
amelia_fit$imputations$imp54$num_hijos_trat_res_mod,
amelia_fit$imputations$imp55$num_hijos_trat_res_mod,
amelia_fit$imputations$imp56$num_hijos_trat_res_mod,
amelia_fit$imputations$imp57$num_hijos_trat_res_mod,
amelia_fit$imputations$imp58$num_hijos_trat_res_mod,
amelia_fit$imputations$imp59$num_hijos_trat_res_mod,
amelia_fit$imputations$imp60$num_hijos_trat_res_mod,
amelia_fit$imputations$imp61$num_hijos_trat_res_mod
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>% #1796511 filas
dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020_women_miss12,row,numero_de_hijos_mod), by=c("amelia_fit_imputations_imp1_row"="row")) %>%
#change the orientation of negative values
dplyr::mutate(value=abs(value),
rounded_value=round(value,0)) %>%
dplyr::mutate(discard=ifelse(numero_de_hijos_mod<rounded_value,1,0)) %>%
dplyr::filter(discard==0) %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::summarise(avg_num_hijos_trat_res_mod_imp= round(mean(value,na.rm=T),0)) %>%
dplyr::ungroup()
# Reemplazo los valores perdidos:
CONS_C1_df_dup_SEP_2020_women_miss13<-
CONS_C1_df_dup_SEP_2020_women_miss12 %>%
dplyr::left_join(num_hijos_trat_res_mod_imputed,by=c("row"="amelia_fit_imputations_imp1_row")) %>%
#si la edad al ingreso no existe, el valor promedio imutado es
dplyr::mutate(num_hijos_trat_res_mod=dplyr::case_when(is.na(num_hijos_trat_res_mod)~as.numeric(avg_num_hijos_trat_res_mod_imp),TRUE~as.numeric(num_hijos_trat_res_mod))) %>%
dplyr::select(-avg_num_hijos_trat_res_mod_imp)
ifelse(nrow(CONS_C1_df_dup_SEP_2020_women_miss13)==nrow(CONS_C1_df_dup_SEP_2020_women_miss),"We did not add an additional row to the data base")
## [1] "We did not add an additional row to the data base"
#table(is.na(CONS_C1_df_dup_SEP_2020_women_miss12$numero_de_hijos_mod))
As a result of the imputations, there were no missing values once imputed.